package com.meiyuetao.myt.crawl.filter;

import java.util.Map;

import lab.s2jh.crawl.filter.ParseFilterChain;

import org.apache.commons.lang3.StringUtils;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.google.common.collect.Maps;
import com.meiyuetao.myt.crawl.entity.ParseCommodity;

public class TaoBaoSingleParseFilter extends AbstractCommodityParseFilter {

    private static final Logger logger = LoggerFactory.getLogger(TaoBaoSingleParseFilter.class);

    private final static int MAX_REVIEW_PAGES = 5;

    @Override
    public void doFilterInternal(String url, ParseFilterChain filterChain) {
        logger.debug("Invoking {} ...", this.getClass());
        HtmlPage htmlPage = fetchHtmlPage(url);
        ParseCommodity parseCommodity = parseCommodityService.findByProperty("baseUrl", url);
        if (parseCommodity == null) {
            parseCommodity = new ParseCommodity(url);
            // 设置来源分组标识，一般取对应站点主域名即可
            parseCommodity.setSourceType("taobao.com");

            parseCommodity.setUid(url);

        }
        parseCommodity.reset();

        String title = parseTitle(htmlPage, "//DIV[@id='detail']//DIV[@class='tb-detail-hd']//H3");
        parseCommodity.setTitle(title);

        // 销售价格属性处理
        parseSalePrice(parseCommodity, htmlPage, "//LI[@id='J_PromoPrice']//STRONG[@class='tb-rmb-num']", "//STRONG[@id='J_StrPrice']//EM[@class='tb-rmb-num']");
        // 商品描述
        parseDescription(parseCommodity, htmlPage, "//DIV[@id='J_DivItemDesc']", "//DIV[@id='J_DivItemDesc']//IMG", "data-lazyload", null);
        // 橱窗图
        parseWindowImgs(parseCommodity, htmlPage, "//UL[@id='J_UlThumb']//IMG", "//IMG[@id='J_ImgBooth']", "data-lazyload");

        // 库存属性处理
        parseSaleStock(parseCommodity, htmlPage, "//SPAN[@id='J_SpanStock']");
        logger.debug("Saving Parse Commodity: {}", parseCommodity);
        DateTime dTime = new DateTime();
        parseCommodity.setLastFetchTime(dTime.getMillis());
        parseCommodity.setLastFetchTimeLabel(dTime.toString("yyyy-MM-dd HH:mm:ss"));
        parseCommodityService.save(parseCommodity);

    }

    @Override
    public Map<String, Object> parseSimpleData(String url) {
        if (isAcceptUrl(url)) {
            HtmlPage htmlPage = fetchHtmlPage(url);
            String xpath = "//LI[@id='J_PromoPrice']//STRONG[@class='tb-rmb-num']";
            String falutPath = "//STRONG[@id='J_StrPrice']//EM[@class='tb-rmb-num']";
            // 销售价格属性处理
            Map<String, Object> jsonMap = Maps.newLinkedHashMap();
            String title = parseTitle(htmlPage, "//DIV[@id='detail']//DIV[@class='tb-detail-hd']//H3");
            jsonMap.put("title", title);
            HtmlElement salePriceNode = htmlPage.getFirstByXPath(xpath);
            String salePrice = "";
            if (salePriceNode == null) {
                salePriceNode = htmlPage.getFirstByXPath(falutPath);
            }
            if (salePriceNode != null) {
                salePrice = salePriceNode.asText();
            }
            salePrice = cleanInvisibleChar(salePrice);
            if (StringUtils.isNotBlank(salePrice)) {
                char c = salePrice.trim().charAt(0);
                if (c > '9' || c < '0') {
                    salePrice = salePrice.substring(1, salePrice.length());
                }
                jsonMap.put("salePrice", salePrice);
            } else {
                return null;
            }

            HtmlElement picNode = htmlPage.getFirstByXPath("//IMG[@id='J_ImgBooth']");
            if (picNode != null) {
                String src = parseImgSrc(url, picNode.getAttribute("data-lazyload"));
                if (StringUtils.isBlank(src)) {
                    src = parseImgSrc(url, picNode.getAttribute("src"));
                    jsonMap.put("pic", src);
                }
            } else {
                return null;
            }

            return jsonMap;
        }
        return null;
    }

}
